# set working directory
setwd('~/replication_files')

# function to load and install packages
load_packages<-function(packages){
  sapply(packages,function(x){
    f<-require(x,character.only=T)
    if(!f) install.packages(x); f<-require(x,character.only=T)
    return(f)},USE.NAMES=T)
}

# load packages
load_packages(c('data.table','gtools','sandwich','miceadds'))

# format numbers to specific decimal places
specify_decimal<-function(x,ndec=3,drop0leading=F,drop0trailing=F,comma=F){
  f<-format(round(x,ndec),nsmall=ndec,drop0trailing=drop0trailing,scientific=F,big.mark=ifelse(comma,",",""))
  if(drop0leading){
    f<-sub("0.",".",f,fixed=T)
  }
  return(trimws(f))
}

# create stars depending on p-value
stars_pval<-function (p.value) {
  unclass(symnum(p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 1), symbols = c("***", "**", "*", "")))
}

# read data
(FinDT<-readRDS('TablesA2A3.rds'))

# specify dependent variables for models
DVs<-c('provax_bin','provax_count','antivax_bin','antivax_count')
# get number of models
(Nmods<-length(DVs))

# specify independent variables
(IVs_main<-rep(list(c('college','female','nonwhite','age30t44','age45t59','age60p','parent','caseid_file_date_N','log(caseid_N)','log(FB_N + 1)')),4))
# add data name
(IVs_all<-lapply(IVs_main,function(x) c(x,'Data_Name')))

# create formulas
(formulas<-lapply(1:Nmods,function(x) as.formula(paste0(DVs[x],'~',paste(IVs_all[[x]],collapse='+')))))

# not sure why but running lm.cluster first outside of lapply seems to be necessary to get it to run inside lapply:
lm.cluster(data=FinDT,formula=formulas[[1]],cluster='resp_id')

# list of model results
modlist<-lapply(1:Nmods,function(x){
  # linear model clustered on respondent
  mod<-miceadds::lm.cluster(data=FinDT,formula=formulas[[x]],cluster='resp_id')
  # summary of model
  modsum<-summary(mod)
  # coefficients
  coefs<-modsum[c(IVs_main[[x]],'(Intercept)'),]
  
  # r^2
  (R2<-specify_decimal(summary(mod$lm_res)$r.squared,3))
  # model N
  (N<-paste0('\\multicolumn{1}{l}{',formatC(length(summary(mod$lm_res)$residuals),big.mark=','),'}'))
  
  # reformat coefficients
  (Est<-specify_decimal(coefs[,1],3))
  # reformat standard errors
  (SE<-paste0('(',specify_decimal(coefs[,2],3),')'))
  # transform p value to star
  (p<-stars_pval(coefs[,4]))
  # add star to estimate
  (Estp<-paste0(Est,'{$^{',p,'}$}'))
  # create data.table of results
  (dt<-data.table(rn=row.names(coefs),Estp=Estp,SE=SE))
  # set names
  setnames(dt,2:3,paste0(c('Estp','SE'),x))
  # return list of data.table, r^2, and N
  list(dt=dt,R2=R2,N=N)
})

# data.tables from model
(dtlist<-lapply(modlist,`[[`,1))

# merge data.tables together
(dtmerge<-Reduce(function(x,y) merge(x,y,by='rn',all=T),dtlist))
# create ordering data.table
(dtorder<-data.table(rn=c('college','female','nonwhite','age30t44','age45t59','age60p','parent','caseid_file_date_N','log(caseid_N)','log(FB_N + 1)','(Intercept)')))
set(dtorder,NULL,'order',1:nrow(dtorder))

# merge results with ordering data.table
(dtmerge<-merge(dtmerge,dtorder,by='rn'))
# reorder
setorder(dtmerge,order)

# columns to fix
(cols2fix<-setdiff(names(dtmerge),c('rn','order')))
# set missing to ''
for(x in cols2fix){
  set(dtmerge,dtmerge[,.I[is.na(get(x))]],which(names(dtmerge)==x),'')
}

# transform to latex table
(tabMain<-lapply(1:Nmods,function(x) c(rbind(dtmerge[[paste0('Estp',x)]],dtmerge[[paste0('SE',x)]]))))
(tabMain<-do.call(cbind,tabMain))
(tabMain<-apply(tabMain,1,function(x) paste(x,collapse=' & ')))

# names of coefficients for table
tabCoefs<-c('College','~graduate','Female','','Nonwhite','','Age 30--44','','Age 45--59','','Age 60+','','Parent','','Days in','~panel','log(URLs','~visited)','log(Facebook','~pages)','Constant','')

# add coefficient names to table
(tabMain<-paste(tabCoefs,tabMain,sep=' & '))
(tabMain<-paste0(tabMain,' \\\\'))
###

# get model r squareds
(modR2s<-lapply(modlist,`[[`,2))
# get model Ns
(modNs<-lapply(modlist,`[[`,3))

# create part of table to go under estimates
(tabBottom<-cbind(c('$R^2$','$N$'),rbind(unlist(modR2s),unlist(modNs))))
(tabBottom<-apply(tabBottom,1,function(x) paste(x,collapse=' & ')))
(tabBottom<-paste(tabBottom,'\\\\'))

# put estimates and model diagnostics together
(tabContent<-c(tabMain,'\\midrule',tabBottom))

# top of Latex table
tab_head<-c('\\begin{table}[!p]',
            '\\caption{Facebook use and online vaccine information exposure (unweighted)}',
            '\\begin{center}',
            '\\begin{threeparttable}',
            '\\begin{tabular}{l *{4}{S[table-format=1.3,table-space-text-post={$^{***}$}]} }',
            '\\toprule',
            '& \\multicolumn{2}{c}{\\myuline{Non-skeptical webpages}} & \\multicolumn{2}{c}{\\myuline{Vaccine-skeptical webpages}} \\\\',
            '& \\multicolumn{1}{c}{Visited $\\geq 1$ page} & \\multicolumn{1}{c}{Total pages visited} & \\multicolumn{1}{c}{Visited $\\geq 1$ page} & \\multicolumn{1}{c}{Total pages visited} \\\\',
            '\\midrule')

# notes for Latex table
tab_notes<-paste('\\item \\leavevmode\\kern-\\scriptspace\\kern-\\labelsep',
                 '$^{*} p < 0.05$, $^{**} p < .01$, $^{***} p<.001$ (two-sided); OLS models with standard errors clustered by respondent. All models include panel fixed effects. Data from YouGov Pulse panel participants with online traffic data available. Each observation represents behavioral web traffic data associated with responses to one of seven national surveys conducted from 2016--2019.')

# bottom of Latex table
tab_foot<-c('\\bottomrule',
            '\\end{tabular}',
            '\\begin{tablenotes}[flushleft]',
            tab_notes,
            '\\end{tablenotes}',
            '\\end{threeparttable}',
            '\\end{center}',
            '\\label{table:taba3}',
            '\\end{table}')

# put strings together
(tab_fin<-c(tab_head,tabContent,tab_foot))

# save table
writeLines(tab_fin,'TableA3.tex')
